#-------------------------------------------------------#
#   Academic freedom and the onset of autocratization   #
#-------------------------------------------------------#

# Title: "Academic freedom and the onset of autocratization" #
# Authors: "Pelke, Lars", FAU Erlangen-Nürnberg
# date: 2023-04-24
# journal: Democratization
# DOI: 10.1080/13510347.2023.2207213
# written under "R version 4.2.3 (2023-03-15)"

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# load packages

library(tidyverse)
library(performance)
library(estimatr)
library(ggeffects)
library(ggokabeito)
library(zoo)
library(marginaleffects)
library(ggpubr)
library(ggthemes)
library(modelsummary)
library(splines)
library(brglm)
library(splines)
library(lmtest)
library(sandwich)

#---------------------------------------------------#
#--------------------load data ---------------------#
#---------------------------------------------------#

#load V-Dem full dataset
#load(here("data_out/vdem_full_v11.Rda"))

# set you working directory here #

#setwd("P:/PIPM/7. VWS Projekt Academic Freedom Index/replication_files_autocratization_and_academic_freedom")

# load country_aggregated data

vdem_full_v11 <- readRDS("data/vdem11/Country_Year_V-Dem_Full+others_R_v11/V-Dem-CY-Full+Others-v11.rds")
vdem_full_v12 <- readRDS("data/vdem12/Country_Year_V-Dem_Full+others_R_v12/V-Dem-CY-Full+Others-v12.rds")
academic_freedom_four_data <-  readRDS("data/academic_freedom_index_cy.rds")

#---------------------------------------------------#
#--------------------subset data--------------------#
#---------------------------------------------------#

vdem_subset_v11 <- vdem_full_v11 %>%
  dplyr::select(country_id, year, COWcode, country_name, country_text_id, v2x_polyarchy, v2x_libdem,
                v2caviol, v2caassemb, v2cagenmob, v2caconmob, v2cademmob, v2caautmob, v2cauni, 
                v2canuni, v2caprotac, v2cafres, v2cafexch, v2cainsaut, v2casurv, v2clacfree, e_regionpol_6C,
                v2xca_academ, e_pt_coup, e_regionpol, v2x_regime, v2xlg_legcon, v2x_jucon)

vdem_full_v12 <- vdem_full_v12 %>%
  dplyr::select(country_id, year, e_gdppc, e_pop)

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(vdem_full_v12, by = c("country_id", "year"))

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(academic_freedom_four_data, by = c("country_text_id", "year")) %>%
  mutate(diff_v2xca_academ = v2xca_academ_four - v2xca_academ )

summary(vdem_subset_v11$diff_v2xca_academ)

vdem_subset_v11 %>% 
  filter(diff_v2xca_academ >= 0.1 | diff_v2xca_academ <= -0.1) %>%
  dplyr::select(country_name, year, v2xca_academ, v2xca_academ_four,diff_v2xca_academ)

## Rename Variables ##

vdem_subset_v11 <- vdem_subset_v11 %>%
  rename(v2xca_academ_five = v2xca_academ, 
         v2xca_academ = v2xca_academ_four)

ggplot(vdem_subset_v11) +
  geom_density(aes(v2xca_academ), color = "red") +
  geom_density(aes(v2xca_academ_five), color = "blue") +
  theme_bw() +
  xlab("Academic Freedom") +
  ylab("Density") 

ggsave("figures/Density_AF.pdf")
 
#---------------------------------------------------#
#--------------------Load ERT data------------------#
#---------------------------------------------------#

# start inclusion: 0.05 
# cum inclusion: 0.1
# year turn: 0.03
# cum turn: 0.1
# tolerance: 5 years

ert_data <-  read.csv("data/ERT-3.1/inst/ERT.csv")

ert_data <- ert_data %>%
  group_by(dem_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_increase = last_v2x_polyarchy - first_v2x_polyarchy)


ert_data <- ert_data %>%
  group_by(aut_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_decrease = last_v2x_polyarchy - first_v2x_polyarchy) %>%
  dplyr::select(-c(last_v2x_polyarchy, first_v2x_polyarchy))

ert_data <- ert_data %>%
  mutate(v2x_polyarchy_increase = ifelse(is.na(dem_ep_id), NA,v2x_polyarchy_increase )) %>%
  mutate(v2x_polyarchy_decrease = ifelse(is.na(aut_ep_id), NA,v2x_polyarchy_decrease ))

ert_data <- ert_data %>%
  dplyr::select(-c(country_text_id, country_name, v2x_regime:v2x_polyarchy_codehigh))

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(ert_data, by =c("country_id", "year"))


vdem_subset_v11 <- vdem_subset_v11 %>%
  group_by(e_regionpol, year) %>%
  mutate(regionpol_EDI = mean(v2x_polyarchy, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(country_id) %>%
  mutate(e_gdppc_growth = ((e_gdppc - dplyr::lag(e_gdppc))/dplyr::lag(e_gdppc))*100) %>%
  mutate(e_gdppc_growth_roll5 = rollmean(e_gdppc_growth, k = 5, fill = NA)) %>%
  ungroup() %>%
  group_by(aut_ep_id) %>%
  mutate(aut_duration =  year- first(year), 
         aut_duration = ifelse(is.na(aut_ep_id), 0, aut_duration)) %>%
  group_by(country_id) %>%
  mutate(group_id = cumsum(aut_ep != lag(aut_ep, default = FALSE))) %>% # group_id for each autocra_period
  group_by(country_id, group_id) %>%
  mutate(spell = year-first(year), 
         spell = ifelse(is.na(aut_ep_id), spell, 0))


#---------------------------------------------------#
#------------Democracy Stock Variable---------------#
#---------------------------------------------------#


decay_sum <- function(tm, vl, decay) {
  last_time <- 0
  current_sum <- 0
  sums <- numeric(length(vl))
  ldecay <- (1-decay)
  for (i in 1:length(vl)) {
    delta <- as.numeric(tm[i] - last_time)
    current_sum <- current_sum * (ldecay * delta) + vl[i]
    last_time <- tm[i]
    sums[i] <- current_sum
  }
  sums
}

annual_depricition_function  <- function(year, var, decay) {
  last_time <- first(year)
  current_sum <- 0
  sums <- numeric(length(var))
  ldecay <- (1-decay)
  for (i in 1:length(var)) {
    delta <- as.numeric(year[i] - last_time)
    current_sum <- ldecay * (current_sum + decay * var[i])
    last_time <- year[i]
    sums[i] <- current_sum
  }
  sums
}


## Functions ##

na_interpolation2 <- function(x, option = "linear", maxgap) {
  library(imputeTS)
  library(dplyr)
  
  total_not_missing <- sum(!is.na(x))
  
  # check there is sufficient data for na_interpolation 
  if(total_not_missing < 2) {x} 
  
  else
    
    # replace takes an input vector, a T/F vector & replacement value
  {replace(
    # input vector is interpolated data
    # this will impute leading/lagging NAs which we don't want 
    imputeTS::na_interpolation(x, option = option, maxgap = maxgap), 
    
    # create T/F vector for NAs,  
    is.na(na.approx(x, na.rm = FALSE)), 
    
    # replace TRUE with NA in input vector  
    NA) 
  }
}


vdem_subset_v11 <- vdem_subset_v11 %>%
  group_by(country_id) %>%
  mutate(v2x_polyarchy_interpolated = na_interpolation2(v2x_polyarchy, option = "linear", maxgap = 5)) %>%
  drop_na(v2x_polyarchy_interpolated) %>%
  mutate(democracy_stock = annual_depricition_function(year, v2x_polyarchy_interpolated, 0.01)) %>%
  dplyr::select(country_id, country_name, year, v2x_polyarchy, democracy_stock, everything())

#### Poland ###

poland <- vdem_subset_v11 %>%
  filter(country_name == "Poland") %>%
  dplyr::select(country_name, year, v2x_polyarchy, aut_ep, aut_ep_id, democracy_stock)

hungary <- vdem_subset_v11 %>%
  filter(country_name == "India") %>%
  dplyr::select(country_name, year, v2x_polyarchy, aut_ep, aut_ep_id, v2xca_academ, democracy_stock)

summary(vdem_subset_v11$democracy_stock)

ggplot(subset(vdem_subset_v11, country_name %in% c("United States of America", "Germany", "Poland", 
                                                   "China", "Russia", "South Africa")), aes(x = year)) +
  geom_line(aes(y = v2x_polyarchy), color = "red") +
  geom_line(aes(y = democracy_stock), color = "blue") +
  facet_wrap(~country_name, ncol = 2) +
  ylim(0,1) +
  theme_clean()

ggsave("figures/Appendix_Democratic_Stock.pdf", dpi = 900, height = 20, width = 15, units = "cm")

#---------------------------------------------------#
#-----------------Statistical Analysis--------------#
#---------------------------------------------------#

## lags for independent variables ##

vdem_subset_v11_noNA <- vdem_subset_v11 %>%
  mutate(e_gdppclog = log(e_gdppc+1), 
         e_poplog = log(e_pop +1), 
         year_1900 = year -1900) %>%
  group_by(country_id) %>%
  mutate(v2xca_academ_lag = lag(v2xca_academ), 
         v2cademmob_lag = lag(v2cademmob), 
         e_gdppclog_lag = lag(e_gdppclog), 
         e_gdppc_growth_roll5_lag = lag(e_gdppc_growth_roll5), 
         regionpol_EDI_lag = lag(regionpol_EDI), 
         e_poplog_lag = lag(e_poplog), 
         v2x_polyarchy_lag = lag(v2x_polyarchy), 
         v2x_polyarchy_lag_squared = v2x_polyarchy_lag^2, 
         v2x_jucon_lag = lag(v2x_jucon), 
         v2xlg_legcon_lag = lag(v2xlg_legcon), 
         democracy_stock_lag = lag(democracy_stock))

vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  mutate(region_factor = as.factor(e_regionpol_6C),
         region_factor = relevel(region_factor, ref=1), 
         spell = spell/100,
         spell_sq = spell^2/100,
         spell_cb = spell^3/100,
         year_sq = year_1900^2, 
         v2xca_academ_lag_sq = v2xca_academ_lag^2)


## drop NA ## 
vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  ungroup() %>%
  filter(year >=1900) %>%
  dplyr::select(-aut_ep_id) %>%
  mutate(aut_ep_stage2 = ifelse(aut_ep_outcome %in% c(1,5), 1, 0)) %>%
  drop_na(v2xca_academ_lag, e_gdppclog, e_gdppc_growth_roll5_lag, regionpol_EDI_lag, 
            e_poplog,region_factor ,v2x_jucon_lag, v2xlg_legcon_lag, year_1900, spell, country_id, democracy_stock_lag) 

summary(vdem_subset_v11_noNA)

## setting ongoing events to NA as recommend by McGrath (2015)

vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  group_by(country_id) %>%
  mutate(aut_ep_onset = ifelse(aut_ep==1 & lag(aut_ep)== 1, NA, aut_ep)) %>%
  dplyr::select(country_id, year, aut_ep, aut_ep_onset, everything()) %>%
  drop_na(aut_ep_onset)

summary(vdem_subset_v11_noNA$aut_ep_onset) # 1.67 % of all country-years
table(vdem_subset_v11_noNA$aut_ep_onset) # 146 autocratization onsets 

# list of all onsets #

list_of_auto_onsets <- vdem_subset_v11_noNA %>%
  filter(aut_ep_onset == 1) %>%
  ungroup() %>%
  dplyr::select(country_name, year)

list_of_auto_onsets$year <- as.factor(list_of_auto_onsets$year)

datasummary_df(list_of_auto_onsets, 
               output = "results/Table_C2.tex", 
               title = "List of country-years with autocratization onset", 
               longtable=TRUE)

country_list_auto <- unique(list_of_auto_onsets$country_name)

#### compare number of autocratization onset in V-Dem sample ####

vdem_subset_v11 <- vdem_subset_v11 %>%
  filter(year >=1900) %>%
  group_by(country_id) %>%
  mutate(aut_ep_onset = ifelse(aut_ep==1 & lag(aut_ep)== 1, NA, aut_ep)) %>%
  dplyr::select(country_id, year, aut_ep, aut_ep_onset, everything()) %>%
  drop_na(aut_ep_onset)

summary(vdem_subset_v11$aut_ep_onset) # 1.67 % of all country-years
table(vdem_subset_v11$aut_ep_onset) # 146 autocratization onsets 

#### Summary Statistics Table B2 Supplementary Appendix ####

vdem_descriptives <- vdem_subset_v11_noNA %>%
  dplyr::select(year, aut_ep_onset, v2xca_academ_lag, democracy_stock_lag, e_gdppclog_lag, 
                e_gdppc_growth_roll5_lag, e_poplog_lag, regionpol_EDI_lag, 
                v2x_jucon_lag, v2xlg_legcon_lag)

datasummary_skim(vdem_descriptives, 
                 title = "Descriptive Statisitcs Sample Study II", 
                 output = 'results/Table_C3.tex', 
                 fmt="%.3f",
                 histogram=FALSE)

##### Recoding onset of an autocratization episode as 0 and stable regime as 1 ####

vdem_subset_v11_noNA$aut_ep_onset_rev <- recode(vdem_subset_v11_noNA$aut_ep_onset, `1` = 0, `0` = 1)
table(vdem_subset_v11_noNA$aut_ep_onset_rev)
table(vdem_subset_v11_noNA$aut_ep_onset)


vdem_subset_v11_noNA %>%
  filter(aut_ep_onset == 1) %>%
  distinct(country_id)

#---------------------------------------------------#
#--------------------- Main Models -----------------#
#---------------------------------------------------#

#### First Step: Find appropriate number of DF for splines ####

mod.linear <- brglm::brglm(as.factor(aut_ep_onset) ~  v2xca_academ_lag + democracy_stock_lag +
                        e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                        region_factor +
                        year_1900 + year_sq, 
                      data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                      pl=T)

mod.bs3 <- brglm::brglm(as.factor(aut_ep_onset) ~ bs(v2xca_academ_lag, df = 3) + democracy_stock_lag +
                          e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                          regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                          region_factor +
                          year_1900 + year_sq, 
                        data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                        method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                        pl=T)

mod.bs4 <- brglm::brglm(as.factor(aut_ep_onset) ~ bs(v2xca_academ_lag, df = 4) + democracy_stock_lag +
                          e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                          regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                          region_factor +
                          year_1900 + year_sq, 
                        data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                        method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                        pl=T)
mod.bs5 <- brglm::brglm(as.factor(aut_ep_onset) ~ bs(v2xca_academ_lag, df = 5) + democracy_stock_lag +
                          e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                          regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                          region_factor +
                          year_1900 + year_sq, 
                        data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                        method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                        pl=T)


mod.py1 <- brglm::brglm(as.factor(aut_ep_onset) ~ poly(v2xca_academ_lag, 2) + democracy_stock_lag +
                          e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                          regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                          region_factor +
                          year_1900 + year_sq, 
                        data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                        method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                        pl=T)

mod.py2 <- brglm::brglm(as.factor(aut_ep_onset) ~ poly(v2xca_academ_lag, 3) + democracy_stock_lag +
                          e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                          regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                          region_factor +
                          year_1900 + year_sq, 
                        data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                        method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                        pl=T)


mod.py3 <- brglm::brglm(as.factor(aut_ep_onset) ~ poly(v2xca_academ_lag, 4) + democracy_stock_lag +
                          e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                          regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                          region_factor +
                          year_1900 + year_sq, 
                        data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                        method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                        pl=T)

mod.py4 <- brglm::brglm(as.factor(aut_ep_onset) ~ poly(v2xca_academ_lag, 5) + democracy_stock_lag +
                          e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                          regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                          region_factor +
                          year_1900 + year_sq, 
                        data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                        method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                        pl=T)

## Export Regression Tables with Information about Model fit ##


modelsummary::modelsummary(list(mod.linear, mod.bs3, mod.bs4, mod.bs5, mod.py1, mod.py2, mod.py3, mod.py4),
                           output = "results/TableC4.tex", 
                           gof_map = c("nobs", "AIC", "BIC", "RMSE", "F"),
                           fmt = 3, longtable = TRUE, 
                           vcov = "stata", cluster = "country_id", 
                           estimate  = c("{estimate}{stars}"),
                           statistic = 'conf.int', 
                           conf_level = .95, 
                           coef_rename = c("(Intercept)" = "Intercept", 
                                           "v2xca_academ_lag" = "Academic Freedom", 
                                           "bs(v2xca_academ_lag, df = 3)1" = "Academic Freedom", 
                                           "bs(v2xca_academ_lag, df = 3)2" = "Academic Freedom K2", 
                                           "bs(v2xca_academ_lag, df = 3)3" = "Acadmeic Freedom K3", 
                                           "bs(v2xca_academ_lag, df = 4)1" = "Academic Freedom", 
                                           "bs(v2xca_academ_lag, df = 4)2" = "Academic Freedom K2", 
                                           "bs(v2xca_academ_lag, df = 4)3" = "Acadmeic Freedom K3",
                                           "bs(v2xca_academ_lag, df = 4)4" = "Academic Freedom K4", 
                                           "bs(v2xca_academ_lag, df = 5)1" = "Academic Freedom", 
                                           "bs(v2xca_academ_lag, df = 5)2" = "Academic Freedom K2", 
                                           "bs(v2xca_academ_lag, df = 5)3" = "Acadmeic Freedom K3",
                                           "bs(v2xca_academ_lag, df = 5)4" = "Academic Freedom K4", 
                                           "bs(v2xca_academ_lag, df = 5)5" = "Academic Freedom K5",
                                           "poly(v2xca_academ_lag, 2)1" = "Academic Freedom",
                                           "poly(v2xca_academ_lag, 2)2" = "Academic Freedom ^2",
                                           "poly(v2xca_academ_lag, 3)1" = "Academic Freedom",
                                           "poly(v2xca_academ_lag, 3)2" = "Academic Freedom ^2",
                                           "poly(v2xca_academ_lag, 3)3" = "Academic Freedom ^3",
                                           "poly(v2xca_academ_lag, 4)1" = "Academic Freedom",
                                           "poly(v2xca_academ_lag, 4)2" = "Academic Freedom ^2",
                                           "poly(v2xca_academ_lag, 4)3" = "Academic Freedom ^3",
                                           "poly(v2xca_academ_lag, 4)4" = "Academic Freedom ^4",
                                           "poly(v2xca_academ_lag, 5)1" = "Academic Freedom",
                                           "poly(v2xca_academ_lag, 5)2" = "Academic Freedom ^2",
                                           "poly(v2xca_academ_lag, 5)3" = "Academic Freedom ^3",
                                           "poly(v2xca_academ_lag, 5)4" = "Academic Freedom ^4",
                                           "poly(v2xca_academ_lag, 5)5" = "Academic Freedom ^5",
                                           "democracy_stock_lag"  = "Democracy Stock", 
                                           "e_gdppclog_lag"  = "GDP pc log", 
                                           "e_gdppc_growth_roll5_lag" = "GDP growth", 
                                           "e_poplog_lag" = "Population log", 
                                           "regionpol_EDI_lag" = "Regional democracy levels", 
                                           "v2x_jucon_lag" =  "Judicial constraints on executive", 
                                           "v2xlg_legcon_lag" = "Legislative constraints on executive", 
                                           "region_factor5" = "Western Europe and North America", 
                                           "region_factor4" = "Subsaharan Africa", 
                                           "region_factor6" = "Asia and Pacific", 
                                           "region_factor1" = "Eastern Europe and Central Asia", 
                                           "region_factor3" = "MENA", 
                                           "year_1900" = "Year", 
                                           "year_sq" = "Year squared", 
                                           "democracy_stock_lag:v2xca_academ_lag" = "AFI * Democracy Stock"))

## Compare Model fit statistics separately ##

compare_performance(mod.linear, mod.bs3, mod.bs4, mod.bs5, 
                    mod.py1, mod.py2, mod.py3, mod.py4)

compare_performance(mod.linear, mod.bs3, mod.bs4, mod.bs5, 
                    mod.py1, mod.py2, mod.py3, mod.py4, rank = T)

#### Fitting the models ####


# Run model 1,
mod.1 <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                        e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                        region_factor +
                        year_1900 + year_sq, 
                      data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                      pl=T)
summary(mod.1)


#Clustered SEs
ses_mod.1 <-  coeftest(mod.1, vcov = vcovCL(mod.1, cluster = vdem_subset_v11_noNA[, "country_id"]),
                       type = "HC1", fix = T, cadjust = T)
ses_mod.1

# Run model 1,
mod.2 <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                        v2xca_academ_lag:democracy_stock_lag + e_gdppclog_lag +
                        e_gdppc_growth_roll5_lag +  e_poplog_lag +
                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                        region_factor +
                        year_1900 + year_sq, 
                      data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                      pl=T)
summary(mod.2)

#Clustered SEs
ses_mod.2 <-  coeftest(mod.2, vcov = vcovCL(mod.2, cluster = vdem_subset_v11_noNA[, "country_id"]),
                       type = "HC1", fix = T, cadjust = T)
ses_mod.2

## standard errors and pvalues for 

standard.errors <- list(ses_mod.1[,2], ses_mod.2[,2])
p.values <- list(ses_mod.1[,4], ses_mod.2[,4])


modelsummary::modelsummary(list(mod.1, mod.2),
                           output = "results/Table3.tex", 
                           fmt = 3,
                           vcov = "stata", cluster = "country_id", 
                           estimate  = c("{estimate}{stars}"),
                           statistic = 'conf.int', 
                           conf_level = .95, 
                           coef_rename = c("(Intercept)" = "Intercept", 
                                           "poly(v2xca_academ_lag, degree = 2)1" = "Academic Freedom", 
                                           "poly(v2xca_academ_lag, degree = 2)2" = "Academic Freedom ^2", 
                                           "democracy_stock_lag"  = "Democracy Stock", 
                                           "e_gdppclog_lag"  = "GDP pc log", 
                                           "e_gdppc_growth_roll5_lag" = "GDP growth", 
                                           "e_poplog_lag" = "Population log", 
                                           "regionpol_EDI_lag" = "Regional democracy levels", 
                                           "v2x_jucon_lag" =  "Judicial constraints on executive", 
                                           "v2xlg_legcon_lag" = "Legislative constraints on executive", 
                                           "region_factor5" = "Western Europe and North America", 
                                           "region_factor4" = "Subsaharan Africa", 
                                           "region_factor6" = "Asia and Pacific", 
                                           "region_factor1" = "Eastern Europe and Central Asia", 
                                           "region_factor3" = "MENA", 
                                           "year_1900" = "Year", 
                                           "year_sq" = "Year squared", 
                                           "democracy_stock_lag:v2xca_academ_lag" = "AFI * Democracy Stock"))


modelsummary::modelsummary(list(mod.1, mod.2),
                           output = "results/Table3.docx", 
                           fmt = 3,
                           vcov = "stata", cluster = "country_id", 
                           estimate  = c("{estimate}{stars}"),
                           statistic = 'conf.int', 
                           conf_level = .95, 
                           coef_rename = c("(Intercept)" = "Intercept", 
                                           "poly(v2xca_academ_lag, degree = 2)1" = "Academic Freedom", 
                                           "poly(v2xca_academ_lag, degree = 2)2" = "Academic Freedom ^2", 
                                           "democracy_stock_lag"  = "Democracy Stock", 
                                           "e_gdppclog_lag"  = "GDP pc log", 
                                           "e_gdppc_growth_roll5_lag" = "GDP growth", 
                                           "e_poplog_lag" = "Population log", 
                                           "regionpol_EDI_lag" = "Regional democracy levels", 
                                           "v2x_jucon_lag" =  "Judicial constraints on executive", 
                                           "v2xlg_legcon_lag" = "Legislative constraints on executive", 
                                           "region_factor5" = "Western Europe and North America", 
                                           "region_factor4" = "Subsaharan Africa", 
                                           "region_factor6" = "Asia and Pacific", 
                                           "region_factor1" = "Eastern Europe and Central Asia", 
                                           "region_factor3" = "MENA", 
                                           "year_1900" = "Year", 
                                           "year_sq" = "Year squared", 
                                           "democracy_stock_lag:v2xca_academ_lag" = "AFI * Democracy Stock"))


summary(vdem_subset_v11_noNA)

# Plot marginal effects 

af_onset <-  sjPlot::plot_model(mod.1, type = "eff",
                                terms = c("v2xca_academ_lag[all]"),
                                vcov.fun = "vcovCL", 
                                vcov.type = "HC1",
                                vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,0.2), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

ggsave("figures/af_onset.pdf", af_onset, width = 18, height = 10, units = "cm", dpi = 900, device = "pdf")


democracy_stock_onset <-  sjPlot::plot_model(mod.1, type = "eff",
                                 terms = c("democracy_stock_lag[all]"),
                                 vcov.fun = "vcovCL", 
                                 vcov.type = "HC1",
                                 vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                 alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() +  scale_y_continuous(limits = c(0,0.5), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 0.7), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")


af_edi_onset_1 <-  sjPlot::plot_model(mod.2, type = "eff",
                                      terms = c("democracy_stock_lag[all]", "v2xca_academ_lag[0.1, 0.5, 0.9]"),
                                      vcov.fun = "vcovCL", 
                                      vcov.type = "HC1",
                                      vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                      alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Electoral Democracy Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_1 <- data.frame(x = af_edi_onset_1$data$x,
                             academic_freedom = af_edi_onset_1$data$group,
                             predicted = af_edi_onset_1$data$predicted,
                             lower = af_edi_onset_1$data$conf.low,
                             upper = af_edi_onset_1$data$conf.high)

af_edi_onset_1_plot <-ggplot(data = af_edi_onset_1) +
  geom_ribbon(aes(x = x, y = predicted, fill = academic_freedom, color = academic_freedom, group = academic_freedom, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = academic_freedom, group = academic_freedom), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Academic Freedom\nIndex", fill =  "Academic Freedom\nIndex") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")


af_edi_onset_2 <-  sjPlot::plot_model(mod.2, type = "eff",
                                      terms = c("v2xca_academ_lag[all]", "democracy_stock_lag[0.1, 0.4, 0.6]"),
                                      vcov.fun = "vcovCL", 
                                      vcov.type = "HC1",
                                      vcov.args = "cluster = vdem_subset_v12_noNA$country_id",
                                      alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_2 <- data.frame(x = af_edi_onset_2$data$x,
                             democracy_stock_lag = af_edi_onset_2$data$group,
                             predicted = af_edi_onset_2$data$predicted,
                             lower = af_edi_onset_2$data$conf.low,
                             upper = af_edi_onset_2$data$conf.high)

af_edi_onset_2_plot <-ggplot(data = af_edi_onset_2) +
  geom_ribbon(aes(x = x, y = predicted, fill = democracy_stock_lag, color = democracy_stock_lag, group = democracy_stock_lag, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = democracy_stock_lag, group = democracy_stock_lag), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Democracy Stock", fill =  "Democracy Stock") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

ggsave("figures/af_edi_onset_2_plot.pdf", af_edi_onset_2_plot, width = 13, height = 10, units = "cm", dpi = 900, device = "pdf")


af_edi_onset_1 %>%
  filter(x == 0.6) 

af_edi_onset_2 %>%
  filter(x == 0.4)

figure <- ggpubr::ggarrange(af_onset, democracy_stock_onset,af_edi_onset_1_plot, af_edi_onset_2_plot,
                            nrow = 2, ncol = 2, legend = "bottom", 
                            labels = c("A","B", "C", "D"))

ggsave("figures/M2_effect_plots_afi_four_democracy_stock.pdf", figure, width = 28, height = 30, units = "cm", dpi = 900, device = "pdf")

## contextualization ##

vdem_subset_v11_noNA %>%
  ungroup() %>%
  filter(democracy_stock_lag >= 0.6)  %>%
  summarize(min = min(v2xca_academ_lag), 
            max = max(v2xca_academ_lag), 
            mean = mean(v2xca_academ_lag))

## Correlation between AF and Democracy ## 

ggplot(data = vdem_subset_v11_noNA, aes(x = v2xca_academ, y = v2x_polyarchy)) +
  geom_point(alpha = 0.1) +
  geom_smooth(color = "black", fill="grey", method = "lm") +
  ylab("Electooral Democracy Index") + xlab("Academic Freedom Index") + theme_bw() + scale_y_continuous(limits = c(0,1)) +
  scale_x_continuous(limits = c(0,1)) +
  geom_rug(alpha = .05, position = "jitter") 

ggsave("figures/Corr_AF_Polyarchy.pdf", width = 15, height = 15, units = "cm", dpi = 900, device = "pdf")
  


